{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pyopencl as cl\n",
    "from pyopencl import array\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def np_se(a, b):\n",
    "    return (a - b) ** 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def gpu_se(a, b, platform, device, context, program):\n",
    "\n",
    "    queue = cl.CommandQueue(context,\n",
    "                            properties=cl.command_queue_properties.\n",
    "                            PROFILING_ENABLE)\n",
    "    mem_flags = cl.mem_flags\n",
    "    a_buf = cl.Buffer(context,\n",
    "                      mem_flags.READ_ONLY | mem_flags.COPY_HOST_PTR,\n",
    "                      hostbuf=a)\n",
    "    b_buf = cl.Buffer(context,\n",
    "                      mem_flags.READ_ONLY | mem_flags.COPY_HOST_PTR, hostbuf=b)\n",
    "    error = np.empty_like(a)\n",
    "    destination_buf = cl.Buffer(context,\n",
    "                                mem_flags.WRITE_ONLY,\n",
    "                                error.nbytes)\n",
    "\n",
    "    exec_evt = program.mean_squared_error(queue, error.shape, None,\n",
    "                                          a_buf, b_buf, destination_buf)\n",
    "    exec_evt.wait()\n",
    "    elapsed = 1e-9*(exec_evt.profile.end - exec_evt.profile.start)\n",
    "\n",
    "    print(\"Execution time of OpenCL: %g s\" % elapsed)\n",
    "\n",
    "    cl.enqueue_copy(queue,\n",
    "                    error, destination_buf)\n",
    "\n",
    "    return error"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "np.random.seed(51)\n",
    "a = np.random.rand(4096).astype(np.float32)\n",
    "b = np.random.rand(4096).astype(np.float32)\n",
    "\n",
    "\n",
    "platform = cl.get_platforms()[0]\n",
    "device = platform.get_devices()[2]\n",
    "context = cl.Context([device])\n",
    "\n",
    "program = cl.Program(context, \"\"\"\n",
    "    __kernel void mean_squared_error(__global const float *a,\n",
    "    __global const float *b, __global float *result)\n",
    "    {\n",
    "        int gid = get_global_id(0);\n",
    "        float temp = a[gid] - b[gid];\n",
    "        result[gid] =  temp * temp;\n",
    "    }\n",
    "        \"\"\").build()\n",
    "gpu_error = gpu_se(a, b, platform, device, context, program)\n",
    "\n",
    "np_error = np_se(a, b)\n",
    "print('GPU error', np.mean(gpu_error))\n",
    "print('NumPy error', np.mean(np_error))\n",
    "%time np_se(a, b)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
